import numpy as np
import pandas as pd
import scipy.stats as ss
import matplotlib.pyplot as plt
from matplotlib import pylab
import pickle, glob, os
%matplotlib inline
pylab.rcParams['figure.figsize'] = 16,5
star_types = ['RRab','RRc','RRd','RRe']
fats_col = ['Amplitude','AndersonDarling','Autocor_length','Beyond1Std','CAR_mean','CAR_sigma','CAR_tau','Con','Eta_e','FluxPercentileRatioMid20','FluxPercentileRatioMid35','FluxPercentileRatioMid50','FluxPercentileRatioMid65','FluxPercentileRatioMid80','Freq1_harmonics_amplitude_0','Freq1_harmonics_amplitude_1','Freq1_harmonics_amplitude_2','Freq1_harmonics_amplitude_3','Freq1_harmonics_rel_phase_0','Freq1_harmonics_rel_phase_1','Freq1_harmonics_rel_phase_2','Freq1_harmonics_rel_phase_3','Freq2_harmonics_amplitude_0','Freq2_harmonics_amplitude_1','Freq2_harmonics_amplitude_2','Freq2_harmonics_amplitude_3','Freq2_harmonics_rel_phase_0','Freq2_harmonics_rel_phase_1','Freq2_harmonics_rel_phase_2','Freq2_harmonics_rel_phase_3','Freq3_harmonics_amplitude_0','Freq3_harmonics_amplitude_1','Freq3_harmonics_amplitude_2','Freq3_harmonics_amplitude_3','Freq3_harmonics_rel_phase_0','Freq3_harmonics_rel_phase_1','Freq3_harmonics_rel_phase_2','Freq3_harmonics_rel_phase_3','LinearTrend','MaxSlope','Mean','Meanvariance','MedianAbsDev','MedianBRP','PairSlopeTrend','PercentAmplitude','PercentDifferenceFluxPercentile','PeriodLS','Period_fit','Psi_CS','Psi_eta','Q31','Rcs','Skew','SlottedA_length','SmallKurtosis','Std','StetsonK','StetsonK_AC']
fats_col_2 = ['Color','Psi_CS','Psi_eta','SlottedA_length','StetsonL','PeriodLs','Rcs','StetsonK_AC','CAR_tau',
'StetsonK','FluxPercentileRatioMid50','FluxPercentileRatioMid60','CAR_tmean','Skew','Mean','Period_fit',
'Eta_e','Autocolor_length','FluxPercentileRatioMid35','FluxPercentileRatioMid20','FluxPercentileRatioMid80',
'Beyond1Std','MedianBRP','SmallKurtosis']
# Extensiones
vista_ext = '.txt'
ogle_ext = '.pkl'
corot_ext = '.csv'
# Points in Corot
corot_n = '3000'
ogle_data = 5000
# Colors to Graph
ogle_color = 'blue'
vista_color = 'red'
corot_colot = 'yellow'
# Outliers
outlier = 0.02
def open_vista(path):
if os.path.exists(path):
df = pd.read_csv(path, header=None)
df.columns = fats_col
return df
return pd.DataFrame([])
def read_pkl(path):
with open(path, 'rb') as f:
data = pickle.load(f)
return data
# Return a Dictionary
def open_ogle(files):
if len(files) == 0:
return {}
data = read_pkl(files[0])
for key, value in data.items():
data[key] = [data[key]]
for f in files[1:]:
d = read_pkl(f)
for key, value in d.items():
data[key] = data[key] + [value]
return data
def open_corot(files):
if len(files) == 0:
return {}
df = pd.read_csv(files[0])
for f in files[1:]:
df2 = pd.read_csv(f)
df = df.append(df2)
return df
def plot(title, number, data, min_num, max_num, bin_num = 100):
plt.subplot(number)
plt.title(title)
bins = np.linspace(min_num, max_num, bin_num)
plt.hist(data, bins, alpha=0.3, normed=True)
def plot_together(title, data, min_num, max_num, bin_num = 100):
bins = np.linspace(min_num, max_num, bin_num)
plt.figure()
plt.title(title)
dec = 4
title_ogle = 'Ogle-III -> Mean: ' + str(round(data[0].mean(),dec)) + ', Std: ' + str(round(data[0].std(),dec))
title_vista = 'VVVDR4 -> Mean: ' + str(round(data[1].mean(),dec)) + ', Std: ' + str(round(data[1].std(),dec))
title_corot = 'Corot -> Mean: ' + str(round(data[2].mean(),dec)) + ', Std: ' + str(round(data[2].std(),dec))
plt.hist(data[0], normed=True, alpha=0.5, color=ogle_color, label=title_ogle)
plt.hist(data[1], normed=True, alpha=0.5, color=vista_color, label=title_vista)
plt.hist(data[2], normed=True, alpha=0.5, color=corot_colot, label=title_corot)
plt.legend(loc='upper right')
plt.show()
def plot_boxplot(title, data):
fig = plt.figure(1, figsize=(16, 6))
ax = fig.add_subplot(111)
plt.title(title)
dec = 4
title_ogle = 'Ogle-III -> Mean: ' + str(round(data[0].mean(),dec)) + ', Std: ' + str(round(data[0].std(),dec))
title_vista = 'VVVDR4 -> Mean: ' + str(round(data[1].mean(),dec)) + ', Std: ' + str(round(data[1].std(),dec))
title_corot = 'Corot -> Mean: ' + str(round(data[2].mean(),dec)) + ', Std: ' + str(round(data[2].std(),dec))
bp = ax.boxplot(data, patch_artist=True)
for box in bp['boxes']:
# change outline color
box.set( color='#7570b3', linewidth=2)
# change fill color
box.set( facecolor = '#1b9e77' )
## change color and linewidth of the whiskers
for whisker in bp['whiskers']:
whisker.set(color='#7570b3', linewidth=2)
## change color and linewidth of the caps
for cap in bp['caps']:
cap.set(color='#7570b3', linewidth=2)
## change color and linewidth of the medians
for median in bp['medians']:
median.set(color='#b2df8a', linewidth=2)
## change the style of fliers and their fill
for flier in bp['fliers']:
flier.set(marker='o', color='#e7298a', alpha=0.5)
ax.set_xticklabels(['Ogle-III', 'VVVDR4', 'Corot'])
ax.get_xaxis().tick_bottom()
ax.get_yaxis().tick_left()
plt.show()
path_vista = ['./VVVDR4/blg/','./VVVDR4/gd/']
path_ogle = ['./OGLE-III/blg/rrlyr/','./OGLE-III/gd/rrlyr/','./OGLE-III/lmc/rrlyr/','./OGLE-III/smc/rrlyr/']
path_corot = './Corot/'
def load_data(idx, star):
vista_db, ogle_db, corot_db = [],[],[]
if len(path_vista) > idx:
vista_db = open_vista(path_vista[idx] + star + vista_ext)
files = glob.glob(path_ogle[idx] + star + '/*' + ogle_ext)
ogle_db = open_ogle(files[0:ogle_data])
files = glob.glob(path_corot + star + '/' + corot_n + '/*' + corot_ext)
corot_db = open_corot(files)
return vista_db, ogle_db, corot_db
def load_all_data(star, c =ogle_data):
aux1 = open_vista(path_vista[0] + star + vista_ext)
aux2 = open_vista(path_vista[1] + star + vista_ext)
vista_db = aux1.append(aux2)
files = []
for name in path_ogle:
aux = glob.glob(name + star + '/*' + ogle_ext)
files += aux[0:c]
ogle_db = open_ogle(files)
files = glob.glob(path_corot + star + '/' + corot_n + '/*' + corot_ext)
corot_db = open_corot(files)
return vista_db, ogle_db, corot_db
# pcnt = 2% (Remove)
def filter_array(data, pcnt = outlier):
data = data[~np.logical_or(np.isnan(data),np.isinf(data))]
data = pd.Series(data)
qlow, median, qhigh = data.quantile([pcnt, 0.50, 1-pcnt])
iqr = qhigh - qlow
data = data[ (data - median).abs() <= iqr]
return data.values
def mostrar(vista, ogle, corot):
for row, name in enumerate(fats_col_2):
try:
hist = plt.figure()
# Databases
ogle_db = filter_array(np.array(ogle[name]))
vista_db = filter_array(vista[name].values)
corot_db = filter_array(corot[name].values)
print '\t ', row,'.- ', name
print '\t \t - Ogle-III: N=', len(ogle_db), ' Mean=', ogle_db.mean(), ' Std=', ogle_db.std()
print '\t \t - VVVDR4: N=', len(vista_db), ' Mean=', vista_db.mean(), ' Std=', vista_db.std()
print '\t \t - Corot: N=', len(corot_db), ' Mean=', corot_db.mean(), ' Std=', corot_db.std()
plot_boxplot('Boxplot', [ogle_db, vista_db, corot_db])
min_num = min(min(ogle_db),min(vista_db),min(corot_db))
max_num = max(max(ogle_db),max(vista_db),max(corot_db))
plot('Ogle', 131, ogle_db, min_num, max_num)
plot('Vista', 132, vista_db, min_num, max_num)
plot('Corot', 133, corot_db, min_num, max_num)
plt.show()
plot_together('All Togheter',(ogle_db, vista_db, corot_db), min_num, max_num)
print '\n'
except Exception:
# print '\t \t [!] Fatal Error'
pass
star = star_types[1]
vista, ogle, corot = load_all_data(star)
mostrar(vista, ogle, corot)
star = star_types[0]
vista, ogle, corot = load_all_data(star)
mostrar(vista, ogle, corot)
star = star_types[0]
vista, ogle, corot = load_data(0,star)
mostrar(vista, ogle, corot)
vista, ogle, corot = load_data(1,star)
mostrar(vista, ogle, corot)
star = star_types[1]
vista, ogle, corot = load_data(0,star)
mostrar(vista, ogle, corot)
vista, ogle, corot = load_data(1,star)
mostrar(vista, ogle, corot)